library(tidyverse)
library(readxl)
library(sf)
library(leaflet)
geocoded_entries <- read_excel(path = "geocoded_entries.xlsx", col_names = TRUE)
head(geocoded_entries)
What is the proportion of addresses detected from NER which are precisely geocoded?
geocoded_entries %>%
select(precise.geom, entry_id) %>%
mutate(geometry = if_else(is.na(precise.geom), FALSE, TRUE)) %>%
group_by(geometry) %>%
summarise(n = n()) %>%
mutate(freq = n/sum(n)) %>%
ggplot(mapping = aes(x = geometry, y = freq)) + # y as frequency
geom_bar(stat = "identity") +
theme_bw() +
ggtitle("<LOC> and <CARD> elements precisely geocoded from the entries") +
labs(caption = "ANR SoDUCo. Data: GeoHistoricalData")
What is the proportion of entries which are precisely geocoded?
order1 <- geocoded_entries %>%
select(precise.geom, entry_id, order) %>%
mutate(geometry = if_else(is.na(precise.geom), "FAUX", "VRAI")) %>%
group_by(entry_id) %>%
mutate(order_n = n()) %>%
filter(order_n == 1) %>%
ungroup() %>%
group_by(geometry) %>%
summarise(n = n()) %>%
mutate(freq = n/sum(n)) %>%
mutate(order_n = 1) %>%
select(order_n, geometry:freq)
notorder1 <- geocoded_entries %>%
select(precise.geom, entry_id, order) %>%
mutate(geometry = if_else(is.na(precise.geom), "FAUX", "VRAI")) %>%
group_by(entry_id) %>%
mutate(order_n = n()) %>%
filter(order_n != 1) %>%
ungroup() %>%
group_by(order_n) %>%
summarise(FAUX = sum(str_count(string = geometry, pattern = "FAUX")),
VRAI = sum(str_count(string = geometry, pattern = "VRAI"))) %>%
pivot_longer(cols = FAUX:VRAI, names_to = "geometry", values_to = "n") %>%
group_by(order_n) %>%
mutate(freq = n/sum(n))
# very few entries with more than 8 localisations
order1 %>% bind_rows(notorder1) %>% filter(order_n > 8) %>% summarise(countelements = sum(n))
order1 %>%
bind_rows(notorder1) %>%
filter(order_n < 8) %>%
ggplot(mapping = aes(x = geometry, y = freq, fill = as.character(order_n))) + # y as frequency
geom_bar(stat = "identity", position = "dodge") +
theme_bw() +
ggtitle("Entries precisely geocoded") +
labs(caption = "ANR SoDUCo. Data: GeoHistoricalData")
As we can see, some geocoding are far from Paris as the entries in street St-Martin located on the island.
Entries only in the Ile-de-France region
## Reading layer `iledefranceosm' from data source
## `/data/user/g/jgravier/JE_soduco_2022_10_11/iledefranceosm.gpkg'
## using driver `GPKG'
## Simple feature collection with 1 feature and 113 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1.446244 ymin: 48.12015 xmax: 3.559221 ymax: 49.24143
## Geodetic CRS: WGS 84